Histotime Analysis

Author

Matthew Zatzman

Published

April 16, 2025

library(tidyverse)
library(ggthemes)
library(ggsci)
library(dittoSeq)
library(patchwork)
library(glue)
library(ggh4x)
library(ggnewscale)
library(patchwork)
library(SingleCellExperiment)
library(Seurat)
library(ggbeeswarm)
library(slingshot)
library(tradeSeq)
library(here)

devtools::load_all()

colors <- load_colors()
markers <- load_markers()

theme_set(theme_classic())

symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, Inf), symbols = c("p<0.0001", "p<0.001", "p<0.01", "p<0.05", "ns"))
symnum_0.1 <- list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 0.1, Inf), symbols = c(
  "p<0.0001",
  "p<0.001", "p<0.01", "p<0.05", "p<0.1", "ns"
))

symnum_0.1_star <- list(cutpoints = c(0, 1e-04, 0.001, 0.01, 0.05, 0.1, Inf), symbols = c(
  "****",
  "***", "**", "*", "^", "ns"
))

Load objects

sce_ti <- readRDS(here("paper_data", "slingshot_sce_v2.rds"))
srt_ti <- readRDS(here("paper_data", "egfr_histo.rds"))
sce_gam <- readRDS(here("paper_data", "sce_gam_v2.rds"))

srt_ti$time_point <- fct_relevel(srt_ti$time_point, "TN", "MRD", "PD")

pseudo.paths <- slingPseudotime(sce_ti)
# Taking the rowMeans just gives us a single pseudo-time for all cells. Cells
# in segments that are shared across paths have similar pseudo-time values in
# all paths anyway, so taking the rowMeans is not particularly controversial.
sce_ti$pseudotime <- rowMeans(pseudo.paths, na.rm = TRUE)
srt_ti$pseudotime <- sce_ti$pseudotime
srt_ti$pseudotime_scaled <- standardize(srt_ti$pseudotime)

Curves

embedded <- embedCurves(sce_ti, "PCA_GENES")
embedded <- slingCurves(embedded)

ti_curves <- purrr::map(embedded, .f = function(l) {
  as.data.frame(l$s[l$ord, ])
})

curves_df <- purrr::map(seq_along(ti_curves), .f = function(i) {
  df <- ti_curves[[i]]
  df$lineage <- i
  return(df)
}) %>%
  list_rbind()
curves_df$lineage <- factor(curves_df$lineage, levels = c(1, 2), labels = c("LUSC", "SCLC"))
trim_v <- c(0, 1)

srt_ti$LUAD_s <- standardize(srt_ti$LUAD, trim = trim_v)
srt_ti$LUSC_s <- standardize(srt_ti$LUSC, trim = trim_v)
srt_ti$SCLC_s <- standardize(srt_ti$SCLC, trim = trim_v)
srt_ti$MAPK_s <- standardize(srt_ti$REACTOME_MAPK_TARGETS_NUCLEAR_EVENTS_MEDIATED_BY_MAP_KINASES, trim = trim_v)

p_list <- FeaturePlot(srt_ti, features = c("histotime", "LUAD_s", "LUSC_s", "SCLC_s", "MAPK_s"), reduction = "histo", raster = F, order = T, combine = F, pt.size = 0.1) %>%
  purrr::map(., .f = function(p) {
    p +
      scale_color_viridis_c(option = "inferno", breaks = c(0, 1), labels = c(0, 1)) +
      NoAxes() +
      guides(color = guide_colorbar(keywidth = 0.4, keyheight = 3))
  })

names(p_list) <- c("Histotime", "LUAD", "LUSC", "SCLC", "Nuclear MAPK (Reactome)")

p_list$Histotime <- p_list$Histotime +
  ggnewscale::new_scale_color() +
  geom_path(arrow = arrow(angle = 30, length = unit(0.4, "lines"), ends = "last", type = "closed"), data = curves_df, aes(group = lineage, x = pcagenes_1, y = pcagenes_2), linewidth = 2, alpha = 1, color = "black", show.legend = F) +
  geom_path(arrow = arrow(angle = 30, length = unit(0.4, "lines"), ends = "last", type = "closed"), data = curves_df, aes(color = lineage, x = pcagenes_1, y = pcagenes_2), linewidth = 1, alpha = 1) +
  scale_color_manual(values = colors$histology_predominant_short, name = "Lineage") +
  guides(color = guide_legend(position = "inside", override.aes = list(linewidth = 1, geom = "line"))) +
  theme(legend.position.inside = c(0.8, 0.5))


p_list <- purrr::map(names(p_list), .f = function(nm) {
  p <- p_list[[nm]] +
    ggtitle(nm)
  ggrastr::rasterise(p, layers = "Point", dpi = 300)
})
names(p_list) <- c("Histotime", "LUAD", "LUSC", "SCLC", "Nuclear MAPK (Reactome)")


layout <- c(
  area(t = 2, b = 3, l = 1, r = 2),
  area(t = 1, b = 2, l = 3, r = 4),
  area(t = 3, b = 4, l = 3, r = 4),
  area(t = 1, b = 2, l = 5, r = 6),
  area(t = 3, b = 4, l = 5, r = 6)
)

pcomb <- wrap_plots(p_list, design = layout)

size_scale <- 1.4

ggsave(pcomb, file = here("plots", "psuedotime_traj.pdf"), width = 12 / size_scale, height = 8 / size_scale)
pcomb

Lineage Genes

# A full version
ysmooth_df_full <- predictSmooth(models = sce_gam, gene = rownames(sce_gam), nPoints = 100, tidy = TRUE) %>%
  mutate(log_counts = log1p(yhat)) %>%
  mutate(lineage = factor(lineage, levels = c(1, 2), labels = c("LUSC", "SCLC"))) %>%
  group_by(gene, lineage) %>%
  mutate(
    pseudotime_rank = 1:n(),
    time_scaled = standardize(pseudotime_rank)
  ) %>%
  ungroup() %>%
  group_by(gene) %>%
  mutate(
    log_counts_scaled = scale(log_counts)[, 1],
    log_counts_standardized = standardize(log_counts)
  ) %>%
  ungroup()

All epi markers

pls <- map(names(markers$epi_markers_final), .f = function(ct) {
  glist <- markers$epi_markers_final[[ct]]

  ysmooth_df_full %>%
    dplyr::filter(gene %in% glist) %>%
    ggplot(aes(x = time_scaled, y = log_counts_standardized, color = gene, group = gene)) +
    labs(y = "Expression", x = "Psuedotime", linetype = "Lineage", title = ct, color = NULL) +
    geom_line(linewidth = 1) +
    scale_color_nejm() +
    guides(linetype = guide_legend(override.aes = list(linewidth = 1))) +
    scale_x_continuous(breaks = c(0, 1)) +
    theme(
      panel.border = element_rect(fill = NA, color = "black"),
      strip.background = element_blank(),
      axis.line = element_blank(),
      plot.title = element_text(size = 14, face = "bold", hjust = 0.5)
    ) +
    facet_grid(. ~ lineage, scales = "free_y")
})
names(pls) <- names(markers$epi_markers_final)

pcomb <- wrap_plots(pls, ncol = 3)
ggsave(pcomb, file = here("plots", "histotime_all_epi_types.pdf"), width = 14, height = 10)
pcomb

Select genes

luad_loss_genes <- c("SFTPB", "SFTPC", "SFTPD", "ETV5") # luad loss

p_luad_loss <- ysmooth_df_full %>%
  group_by(gene) %>%
  mutate(log_counts_standardized = standardize(log_counts)) %>%
  dplyr::filter(gene %in% luad_loss_genes) %>%
  ggplot(aes(x = time_scaled, y = log_counts_standardized, color = gene, group = gene)) +
  geom_line(linewidth = 1) +
  labs(y = "Expression", color = "AT2 Markers", x = "Psuedotime", linetype = "Lineage") +
  scale_color_nejm() +
  guides(linetype = guide_legend(override.aes = list(linewidth = 1))) +
  scale_x_continuous(breaks = c(0, 1)) +
  theme(
    panel.border = element_rect(fill = NA, color = "black"),
    strip.background = element_blank(),
    axis.line = element_blank()
  ) +
  facet_grid(. ~ lineage, scales = "free_y")

lusc_gain_genes <- c("TP63", "KRT5", "KRT15", "KRT17")

p_lusc_gain <- ysmooth_df_full %>%
  dplyr::filter(gene %in% lusc_gain_genes) %>%
  mutate(gene = fct_relevel(gene, lusc_gain_genes)) %>%
  ggplot(aes(x = time_scaled, y = log_counts_standardized, color = gene, group = gene)) +
  geom_line(linewidth = 1) +
  labs(y = "Expression", color = "Basal Markers", x = "Psuedotime", linetype = "Lineage") +
  scale_color_nejm() +
  guides(linetype = guide_legend(override.aes = list(linewidth = 1))) +
  scale_x_continuous(breaks = c(0, 1)) +
  facet_grid(. ~ lineage)

sclc_gain_genes <- c("ASCL1", "NCAM1", "NEUROD1", "SYP") # SCLC

p_sclc_gain <- ysmooth_df_full %>%
  dplyr::filter(gene %in% sclc_gain_genes) %>%
  ggplot(aes(x = time_scaled, y = log_counts_standardized, color = gene, group = gene)) +
  geom_line(linewidth = 1) +
  labs(y = "Expression", color = "NE Markers", x = "Psuedotime", linetype = "Lineage") +
  scale_color_nejm() +
  scale_linetype_manual(values = c("solid", "dotted")) +
  guides(linetype = guide_legend(override.aes = list(linewidth = 1))) +
  scale_x_continuous(breaks = c(0, 1)) +
  facet_grid(. ~ lineage)

pcomb <- (p_luad_loss) + (p_lusc_gain + theme(strip.text.x = element_blank())) + (p_sclc_gain + theme(strip.text.x = element_blank())) + plot_layout(ncol = 1, axes = "collect", axis_titles = "collect") &
  theme(
    panel.border = element_rect(fill = NA, color = "black"),
    strip.background = element_blank(),
    axis.line = element_blank(), plot.margin = margin(0.8, 0.5, 0.8, 0.5, "mm"),
    legend.margin = margin()
  ) &
  guides(color = guide_legend(keyheight = 0.5, keywidth = 0.5)) &
  scale_y_continuous(breaks = c(0, 1), labels = c(0, 1)) &
  labs(x = "Histotime", y = "Scaled Expression")
ggsave(pcomb, file = here("plots", "histotime_example_genes.pdf"), width = 3.75, height = 3.5)
pcomb

Cancer cell states over pseudotime

p1 <- srt_ti@meta.data %>%
  ggplot(aes(x = standardize(pseudotime), y = fct_reorder(cell_type_epi, -pseudotime, median))) +
  # geom_violin(scale = "width") +
  ggrastr::rasterize(geom_quasirandom(aes(color = cell_type_epi), size = 0.1, alpha = 0.5, show.legend = F), dpi = 1000) +
  geom_pointrange(
    pch = 21,
    stat = "summary", fatten = 2, color = "black", fill = "white", stroke = 0.5,
    fun.min = function(z) {
      quantile(z, 0.25)
    },
    fun.max = function(z) {
      quantile(z, 0.75)
    },
    fun = median, position = position_dodge(width = 0.8)
  ) +
  guides(x = guide_axis(cap = "upper")) +
  scale_color_manual(values = colors$cell_type_epi) +
  scale_x_continuous(breaks = c(0, 1)) +
  labs(x = "Histotime", y = "Epithelial cell state") +
  theme(plot.margin = margin(1, 0, 1, 1, "mm"))

# Distribution over the branches
p2 <- srt_ti@meta.data %>%
  ggplot(aes(y = fct_reorder(cell_type_epi, -pseudotime_scaled, median))) +
  geom_bar(aes(fill = lineage_assignment), position = "fill") +
  labs(fill = "Branch", x = "Prop.", y = NULL) +
  scale_x_continuous(expand = c(0, 0), breaks = c(0, 1)) +
  guides(
    x = guide_axis(cap = "both"),
    fill = guide_legend(keywidth = 0.4, keyheight = 0.4, override.aes = list(color = NULL))
  ) +
  scale_fill_manual(values = colors$histology_predominant_short) +
  theme(
    axis.text.y = element_blank(),
    axis.line.y = element_blank(),
    axis.ticks.y = element_blank(),
    plot.margin = margin(1, 1, 1, 0, "mm")
  )


pcomb <- p1 + p2 + plot_layout(nrow = 1, widths = c(1, 0.4))
ggsave(pcomb, file = here("plots", "histotime_epi_types_dotviolin_wprop.pdf"), width = 3.75, height = 3.25)
pcomb

Quantify proportions

df <- srt_ti@meta.data %>%
  add_count(cell_type_epi, lineage_assignment, name = "numer") %>%
  add_count(cell_type_epi, name = "denom") %>%
  mutate(prop = numer / denom) %>%
  distinct(cell_type_epi, lineage_assignment, numer, denom, prop) %>%
  arrange(cell_type_epi, lineage_assignment) %>%
  mutate(prop = round(prop, digits = 3))
knitr::kable(df)
cell_type_epi lineage_assignment numer denom prop
AT1-like LUAD 7191 7208 0.998
AT1-like LUSC 17 7208 0.002
AT2-like LUAD 45163 45211 0.999
AT2-like LUSC 40 45211 0.001
AT2-like SCLC 8 45211 0.000
AT2-like PDTC LUAD 17182 18466 0.930
AT2-like PDTC LUSC 528 18466 0.029
AT2-like PDTC SCLC 756 18466 0.041
Atypical LUAD 818 857 0.954
Atypical LUSC 24 857 0.028
Atypical SCLC 15 857 0.018
Basal-like LUAD 516 996 0.518
Basal-like LUSC 475 996 0.477
Basal-like SCLC 5 996 0.005
Cycling LUAD 4604 6576 0.700
Cycling LUSC 945 6576 0.144
Cycling SCLC 1027 6576 0.156
Hepatocyte-like LUAD 1256 1267 0.991
Hepatocyte-like LUSC 4 1267 0.003
Hepatocyte-like SCLC 7 1267 0.006
Multiciliated LUAD 1798 1967 0.914
Multiciliated LUSC 23 1967 0.012
Multiciliated SCLC 146 1967 0.074
Neuroendocrine LUAD 191 1278 0.149
Neuroendocrine LUSC 45 1278 0.035
Neuroendocrine SCLC 1042 1278 0.815
PDTC 1 LUAD 26421 28271 0.935
PDTC 1 LUSC 1610 28271 0.057
PDTC 1 SCLC 240 28271 0.008
PDTC 2 LUAD 3099 4479 0.692
PDTC 2 LUSC 1349 4479 0.301
PDTC 2 SCLC 31 4479 0.007
PDTC 3 LUAD 4494 6367 0.706
PDTC 3 LUSC 1805 6367 0.283
PDTC 3 SCLC 68 6367 0.011
PDTC 4 LUAD 2304 5026 0.458
PDTC 4 LUSC 2488 5026 0.495
PDTC 4 SCLC 234 5026 0.047

Ucell score within lineages

Load pathways

meta_programs_df <- read.table(file = "https://raw.githubusercontent.com/mjz1/meta_programs_tirosh/refs/heads/main/tirosh_mp_patched.txt", header = T, sep = "\t")

meta_programs <- split(meta_programs_df, meta_programs_df$cell_type)

meta_programs <- map(meta_programs, .f = function(x) {
  res <- split(x, x$meta_program)
  map(res, .f = function(y) y$Gene)
})

names(meta_programs) <- janitor::make_clean_names(names(meta_programs))

pathways_v2 <- read.table(here("paper_data", "egfr_pathways.txt"), header = T, sep = "\t")


all_paths <- c(meta_programs$malignant, split(pathways_v2$Gene, pathways_v2$pathway))
if (!file.exists(here("paper_data", "ucell_scores_histotime.txt"))) {
  library(UCell)
  library(BiocParallel)
  mat_full <- predictSmooth(models = sce_gam, gene = rownames(sce_gam), nPoints = 100, tidy = FALSE)

  ranks <- StoreRankings_UCell(matrix = mat_full, BPPARAM = MulticoreParam(workers = 16, progressbar = T))

  histo_scores <- ScoreSignatures_UCell(precalc.ranks = ranks, features = all_paths, name = "", BPPARAM = MulticoreParam(workers = 16, progressbar = T))

  ucell_df <- histo_scores %>%
    as.data.frame() %>%
    rownames_to_column("id") %>%
    separate(id, into = c("lineage_no", "pseudotime_rank")) %>%
    mutate(pseudotime = as.numeric(pseudotime_rank) / 100) %>%
    mutate(lineage = factor(lineage_no, levels = c("lineage1", "lineage2"), labels = c("LUSC", "SCLC"))) %>%
    pivot_longer(cols = colnames(histo_scores), names_to = "features.plot", values_to = "Score") %>%
    mutate(features.plot = fct_recode(
      features.plot,
      "LUAD" = "LUAD_GIRARD",
      "LUSC" = "LUSC_GIRARD",
      "SCLC" = "SCLC_CCLE"
    ))

  write.table(ucell_df, file = here("paper_data", "ucell_scores_histotime.txt"), col.names = T, row.names = F, quote = F, sep = "\t")
} else {
  ucell_df <- read.table(file = here("paper_data", "ucell_scores_histotime.txt"), header = T, sep = "\t")
}

Select pathways

emt_paths <- grep("emt|HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION", ignore.case = T, unique(ucell_df$features.plot), value = T)
myc_paths <- c(grep("myc", ignore.case = T, unique(ucell_df$features.plot), value = T))
stemness <- c("WONG_EMBRYONIC_STEM_CELL_CORE", "BENPORATH_PRC2_TARGETS")
metabolic_paths <- c("HALLMARK_GLYCOLYSIS", "HALLMARK_OXIDATIVE_PHOSPHORYLATION", "HALLMARK_MTORC1_SIGNALING")
histological_malignant <- c("LUAD", "LUSC", "SCLC")
histological <- c("TRAVAGLINI_LUNG_ALVEOLAR_EPITHELIAL_TYPE_1_CELL", "TRAVAGLINI_LUNG_ALVEOLAR_EPITHELIAL_TYPE_2_CELL", "alveolar", "TRAVAGLINI_LUNG_BASAL_CELL", "TRAVAGLINI_LUNG_CILIATED_CELL", "TRAVAGLINI_LUNG_CLUB_CELL", "TRAVAGLINI_LUNG_NEUROENDOCRINE_CELL", "TRAVAGLINI_LUNG_PROLIFERATING_BASAL_CELL", "TRAVAGLINI_LUNG_PROXIMAL_BASAL_CELL")
cell_cycle <- c(grep("cycle", unique(ucell_df$features.plot), ignore.case = T, value = T))
mapk <- c("REACTOME_MAPK_FAMILY_SIGNALING_CASCADES", "REACTOME_MAPK_TARGETS_NUCLEAR_EVENTS_MEDIATED_BY_MAP_KINASES", "BIOCARTA_MAPK_PATHWAY", "KEGG_MAPK_SIGNALING_PATHWAY", "REACTOME_SIGNALING_BY_EGFR")


pth_list <- list(
  "EMT" = emt_paths,
  "MYC" = myc_paths,
  "Stem" = stemness,
  "Metabolic" = metabolic_paths,
  "Lung Cancer" = histological_malignant,
  "Histological" = histological,
  "Cell Cycle" = cell_cycle,
  "MAPK" = mapk
)

pth_cat_df <- enframe(pth_list, name = "Category", value = "features.plot") %>%
  unnest(cols = "features.plot")

pth_df <- ucell_df %>%
  filter(features.plot %in% unlist(pth_list)) %>%
  left_join(pth_cat_df) %>%
  filter(!grepl("TGFB", features.plot)) %>%
  mutate(pathway = case_when(
    grepl("^TRAVAGLINI_LUNG_", features.plot) ~ str_to_title(gsub("_", " ", gsub("TRAVAGLINI_LUNG_", "", features.plot))),
    features.plot == "alveolar" ~ "Alveolar Signature",
    grepl("^HALLMARK", features.plot) ~ str_to_title(gsub("_", " ", gsub("HALLMARK_", "", features.plot))),
    features.plot == "emt_i" ~ "EMT (I)",
    features.plot == "emt_ii" ~ "EMT (II)",
    features.plot == "emt_iii" ~ "EMT (III)",
    features.plot == "emt_iv" ~ "EMT (IV)",
    features.plot == "S.Score" ~ "S Score",
    features.plot == "G2M.Score" ~ "G2M Score",
    features.plot == "cell_cycle_g1_s" ~ "G1S",
    features.plot == "cell_cycle_g2_m" ~ "G2M",
    features.plot == "cell_cycle_hmg_rich" ~ "HMG Rich",
    features.plot == "myc" ~ "MYC",
    features.plot == "WONG_EMBRYONIC_STEM_CELL_CORE" ~ "Embryonic Stem Cell (Wong)",
    features.plot == "BENPORATH_PRC2_TARGETS" ~ "PRC2 Targets (Ben-Porath)",
    features.plot == "REACTOME_MAPK_FAMILY_SIGNALING_CASCADES" ~ "MAPK (Reactome)",
    features.plot == "REACTOME_MAPK_TARGETS_NUCLEAR_EVENTS_MEDIATED_BY_MAP_KINASES" ~ "Nuclear MAPK (Reactome)",
    features.plot == "REACTOME_ONCOGENIC_MAPK_SIGNALING" ~ "Oncogenic MAPK (Reactome)",
    features.plot == "BIOCARTA_MAPK_PATHWAY" ~ "MAPK (Biocarta)",
    features.plot == "KEGG_MAPK_SIGNALING_PATHWAY" ~ "MAPK (KEGG)",
    features.plot == "REACTOME_SIGNALING_BY_EGFR" ~ "EGFR (Reactome)",
    .default = features.plot
  )) %>%
  # Fix additional
  mutate(pathway = case_when(
    pathway == "Mtorc1 Signaling" ~ "MTORC1",
    pathway == "Epithelial Mesenchymal Transition" ~ "EMT",
    pathway == "Alveolar Epithelial Type 2 Cell" ~ "AT2 Cell",
    pathway == "Alveolar Epithelial Type 1 Cell" ~ "AT1 Cell",
    pathway == "Oxidative Phosphorylation" ~ "OxPhos",
    grepl("Myc", pathway) ~ gsub("Myc", "MYC", pathway),
    .default = pathway
  )) %>%
  mutate(
    pathway_source = case_when(
      features.plot %in% names(meta_programs$malignant) ~ "Malignant Metaprograms",
      grepl("^TRAVAGLINI", features.plot) ~ "Travaglini",
      grepl("^HALLMARK_", features.plot) ~ "Hallmark Pathways",
      # features.plot %in% c("S.Score", "G2M.Score") ~ "Seurat",
      .default = "Curated"
    )
  ) %>%
  # Remove extra myc pathways since they all show the same thing
  filter(
    !(grepl("MYC", features.plot, ignore.case = T) & pathway_source == "Curated")
  ) %>%
  arrange(desc(pathway_source), desc(pathway))

# constructive::construct(unique(pth_df$pathway))
pth_cat_order <- c(
  "Histological", "Lung Cancer", "MAPK", "EMT", "MYC",
  "Stem", "Metabolic", "Cell Cycle"
)

pth_order <- c(
  "G1S", "G2M", "G2M Score", "HMG Rich", "S Score",
  "EMT (I)", "EMT (II)", "EMT (III)", "EMT (IV)", "EMT",
  "LUAD", "LUSC", "SCLC",
  "AT1 Cell", "AT2 Cell", "Alveolar Signature", "Basal Cell", "Proximal Basal Cell",
  "Proliferating Basal Cell", "Club Cell", "Ciliated Cell",
  "Neuroendocrine Cell", "MYC", "MYC Targets V1", "MYC Targets V2",
  "Glycolysis", "OxPhos", "MTORC1", "Embryonic Stem Cell (Wong)",
  "PRC2 Targets (Ben-Porath)",
  "MAPK (KEGG)", "MAPK (Biocarta)", "Nuclear MAPK (Reactome)", "MAPK (Reactome)", "EGFR (Reactome)"
)

pth_df <- pth_df %>%
  mutate(
    pathway = fct_relevel(pathway, rev(pth_order)),
    Category = fct_relevel(Category, pth_cat_order)
  )
p1 <- pth_df %>%
  ggplot(aes(x = "Pathway Source", y = pathway)) +
  geom_tile(aes(fill = pathway_source), color = "black") +
  scale_fill_pander() +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  theme(
    axis.line.x = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.line.y = element_blank(), legend.position = "left", strip.background = element_blank(), strip.text = element_blank()
  ) +
  guides(
    x = guide_axis(angle = 90),
    fill = guide_legend(keywidth = 0.5, keyheight = 0.5, ncol = 2)
  ) +
  facet_grid(Category ~ ., scales = "free", space = "free") +
  labs(y = NULL, x = NULL, fill = "Pathway Source")


p2 <- pth_df %>%
  group_by(pathway) %>%
  mutate(Score_scaled = scale(Score)[, 1]) %>%
  ggplot(aes(x = pseudotime, y = pathway)) +
  geom_tile(aes(fill = Score_scaled)) +
  facet_grid(Category ~ lineage, scales = "free", space = "free") +
  scale_fill_viridis_c(option = "inferno", limits = c(-2, 2), oob = scales::squish) +
  # scale_fill_gradient2(low = scales::muted("blue"), high = scales::muted("red"), limits = c(-1, 2), oob = scales::squish,
  # labels = c(-1, 0, 1, 2), breaks = c(-1, 0, 1, 2)) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks = element_blank(), axis.line = element_blank(),
    strip.background = element_blank(),
    panel.border = element_rect(fill = NA, color = "black"),
    legend.position = "bottom",
    strip.text.y = element_text(hjust = 0, angle = 0)
  ) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_discrete(expand = c(0, 0)) +
  labs(x = "Histotime", y = NULL, fill = "Expression") +
  guides(fill = guide_colorbar(keywidth = 3, keyheight = 0.4))
# ggsave(p1, file = here("plots", "histotime_signature_heatplot.pdf"), width = 5, height = 6)


sig_heatplot <-
  free(
    (p1 +
      theme(plot.margin = margin(l = 0.25, r = 1, unit = "mm"))),
    type = "space", side = "b"
  ) +
    free(
      (p2 +
        theme(plot.margin = margin(r = 0.25, unit = "mm"), axis.text.y = element_blank(), axis.ticks.y = element_blank())),
      type = "label", side = "b"
    ) +
    plot_layout(guides = "collect", ncol = 2, widths = c(0.03, 1)) +
    plot_annotation(theme = theme(legend.position = "bottom")) &
    theme(
      panel.spacing = unit(1, "mm"),
      panel.border = element_rect(fill = NA, color = "black", linewidth = 0.5),
      legend.box.margin = margin(), legend.title.position = "top", legend.title = element_text(hjust = 0.5)
    ) &
    coord_cartesian(clip = "off")
ggsave(sig_heatplot, file = here("plots", "histotime_signature_heatmap.pdf"), width = 5, height = 6)
sig_heatplot

Histoviolins

Histotime weights

Weights of cells in each lineage along psuedotime

srt_ti <- AddMetaData(srt_ti, slingCurveWeights(sce_ti))
srt_ti$lineage_probability <- srt_ti$Lineage2 - srt_ti$Lineage1
srt_ti$lineage_sum <- srt_ti$Lineage2 + srt_ti$Lineage1

srt_ti$lineage_probability2 <- srt_ti$lineage_probability * srt_ti$histotime

Leaning cell assignment

srt_ti$lin_leaning <- abs(srt_ti$lineage_probability2) > 0.05

srt_ti$lineage_lean_05 <- srt_ti@meta.data %>%
  mutate(lineage_lean = case_when(
    abs(lineage_probability2) <= 0.05 ~ "LUAD",
    lineage_probability2 > 0 ~ "SCLC",
    lineage_probability2 < 0 ~ "LUSC",
    .default = "ERROR"
  )) %>%
  pull(lineage_lean)

srt_ti$lineage_lean_20 <- srt_ti@meta.data %>%
  mutate(lineage_lean = case_when(
    abs(lineage_probability2) <= 0.2 ~ "LUAD",
    lineage_probability2 > 0 ~ "SCLC",
    lineage_probability2 < 0 ~ "LUSC",
    .default = "ERROR"
  )) %>%
  pull(lineage_lean)

Proportion plot summarized

p_lineage_prop <- srt_ti@meta.data %>%
  add_count(sample_id, name = "denom") %>%
  add_count(sample_id, lineage_lean_20, name = "numer") %>%
  mutate(prop = numer / denom) %>%
  # filter(n >= 20) %>%
  distinct(sample_id, time_point, lineage_lean_20, prop, numer, denom, histology_predominant_short) %>%
  # mutate(time_point = fct_recode(time_point, "PD" = "Progression")) %>%
  # left_join(sample_meta) %>%
  complete(lineage_lean_20, nesting(sample_id, time_point, histology_predominant_short), fill = list(prop = 0)) %>%
  mutate(histology_predominant_short = fct_relevel(histology_predominant_short, "LUAD", "LUSC", "SCLC", "Poorly Diff.")) %>%
  arrange((histology_predominant_short)) %>%
  ggplot(aes(x = time_point, y = prop)) +
  see::geom_jitter2(aes(color = histology_predominant_short), alpha = 0.8, width = 0.35) +
  facet_nested(. ~ lineage_lean_20, scales = "free", space = "free", switch = "x") +
  scale_color_manual(values = colors$histology_predominant_short) +
  ggpubr::geom_pwc(tip.length = 0, symnum.args = symnum.args, label = "p.adj.signif", p.adjust.method = "BH", label.size = 3) +
  scale_y_continuous(breaks = seq(0, 1, by = 0.25), expand = expansion(mult = c(0.05, 0.1))) +
  guides(
    x = guide_axis(cap = "both"),
    y = guide_axis(cap = "both")
  ) +
  labs(color = "Clinical Histology") +
  theme(
    strip.background.x = element_part_rect(side = "t", fill = NA), strip.placement = "outside",
    axis.title.y = element_text(hjust = 0.3)
  ) +
  labs(x = NULL, y = "Proportion\nCommitted\n(>20%)")
# ggsave(p_lineage_prop, file = here("plots", "lineage_prop_jitter_v2.pdf"), width = 6, height = 2.5)
p_lineage_prop

m <- p_lineage_prop$data %>%
  pivot_wider(id_cols = "sample_id", names_from = lineage_lean_20, values_from = prop) %>%
  column_to_rownames("sample_id") %>%
  as.matrix()

scrna_histo <- apply(m, MARGIN = 1, FUN = function(x) {
  colnames(m)[which(x == max(x))]
}) %>%
  enframe(name = "sample_id", value = "scrna_histology")

srt_ti$scrna_histo <- scrna_histo$scrna_histology[match(srt_ti$sample_id, scrna_histo$sample_id)]

Dedifferentiation score

If we rescale within the core luad branch we approximate a luad dedifferentiation score.

# quantile(srt_ti@meta.data[srt_ti$lineage_assignment == "LUAD", ]$pseudotime_scaled, prob = c(0.05, 0.99))

srt_ti@meta.data %>%
  filter(lineage_assignment == "LUAD") %>%
  ggplot(aes(x = pseudotime_scaled)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = quantile(srt_ti@meta.data[srt_ti$lineage_assignment == "LUAD", ]$pseudotime_scaled, prob = c(0.05, 0.99)))

Overlap with the other branches?

srt_ti@meta.data %>%
  ggplot(aes(x = lineage_assignment, y = pseudotime_scaled, fill = lineage_assignment)) +
  geom_violin() +
  scale_fill_manual(values = colors$histology_predominant_short) +
  geom_hline(yintercept = quantile(srt_ti@meta.data[srt_ti$lineage_assignment == "LUAD", ]$pseudotime_scaled, prob = c(0.05, 0.99)))

srt_ti$luad_dediff <- srt_ti$pseudotime_scaled
cuts <- quantile(srt_ti@meta.data[srt_ti$lineage_assignment == "LUAD", ]$pseudotime_scaled, prob = c(0.05, 0.99))

srt_ti$luad_dediff[srt_ti$luad_dediff < cuts[1]] <- cuts[1]
srt_ti$luad_dediff[srt_ti$luad_dediff > cuts[2]] <- cuts[2]

srt_ti$luad_dediff <- standardize(srt_ti$luad_dediff)
p_linprob_dediff <- srt_ti@meta.data %>%
  arrange(desc(abs(luad_dediff))) %>%
  # filter(lineage_assignment == "LUAD") %>%
  ggplot(aes(x = fct_reorder(sample_id, (lineage_probability2), mean), y = luad_dediff)) +
  geom_quasirandom(size = 0.01, bandwidth = 3, aes(color = luad_dediff)) +
  guides(
    x = guide_axis(angle = 90),
    color = guide_colorbar(keywidth = 0.4, keyheight = 3)
  ) +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_color_viridis_c(option = "inferno", breaks = c(0, 0.5, 1)) +
  labs(x = "Sample ID", y = "Dediff.\nScore ", color = "Dediff.\nScore") +
  theme(
    strip.background = element_blank(),
    axis.line = element_blank(),
    panel.border = element_rect(linewidth = 1, fill = NA, color = "black")
  )

p_linprob_dediff <- ggrastr::rasterise(p_linprob_dediff, layers = "Point", dpi = 300)
p_linprob_dediff

Lineage committment violin

# Of the possibly committed cells how much go each way in each sample
p_linprob_vln <- srt_ti@meta.data %>%
  arrange(desc(abs(lineage_probability2))) %>%
  ggplot(aes(x = fct_reorder(sample_id, (lineage_probability2), mean), y = lineage_probability2)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black", alpha = 1) +
  geom_quasirandom(size = 0.01, bandwidth = 2, aes(color = lineage_probability2)) +
  guides(
    x = guide_axis(angle = 90),
    color = guide_colorbar(keywidth = 0.4, keyheight = 3)
  ) +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_y_continuous(breaks = c(-1, 0, 1), limits = c(-1, 1), labels = c("LUSC", "LUAD", "SCLC")) +
  # scale_color_viridis_c(option = "turbo", breaks = c(-1, 1), labels = c("LUSC", "SCLC")) +
  scale_color_gradient2(high = colors$histology_predominant_short[["SCLC"]], low = colors$histology_predominant_short[["LUSC"]], mid = colors$histology_predominant_short[["LUAD"]], breaks = c(-1, -0.5, 0, 0.5, 1), limits = c(-1, 1), labels = c("LUSC", "", "LUAD", "", "SCLC")) +
  # colorspace::scale_color_continuous_divergingx(palette = "roma", breaks = c(-1, -0.5, 0, 0.5, 1), labels = c("LUSC", "", "LUAD", "", "SCLC")) +
  theme(
    strip.background = element_blank(),
    axis.line = element_blank(),
    panel.border = element_rect(linewidth = 1, fill = NA, color = "black")
  ) +
  labs(x = "Sample ID", y = "Lineage\nCommitment", color = "Lineage")

p_linprob_vln <- ggrastr::rasterise(p_linprob_vln, layers = "Point", dpi = 300)
p_linprob_vln

Merged violin

tb_pad <- 0.25

library(ggnewscale)

samp_order <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  mutate(sample_id = fct_reorder(sample_id, lineage_probability2, mean)) %>%
  pull(sample_id) %>%
  levels()

anno_df <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  distinct(sample_id, mapk_alt, tp53mut, rb1mut, site_of_tissue_simple, histology_predominant_short, scrna_histology, stage_of_tissue_simple, time_point, has_impact) %>%
  mutate(site_of_tissue_simple = factor(site_of_tissue_simple, levels = names(colors$site_of_tissue_simple))) %>%
  mutate(has_impact = as.logical(has_impact))
anno_df$tp53mut[!anno_df$has_impact] <- "NA"
anno_df$mapk_alt[anno_df$time_point %in% c("TN", "MRD")] <- "NA"
anno_df$rb1mut[!anno_df$has_impact] <- "NA"
anno_df$mapk_alt[anno_df$sample_id == "PD20"] <- "Unknown"

p_anno <- anno_df %>%
  ggplot(aes(x = fct_relevel(sample_id, samp_order))) +
  geom_tile(aes(y = "Clinical Histology", fill = histology_predominant_short), color = "black") +
  scale_fill_manual(
    name = "Clinical Histology", values = colors$histology_predominant_short,
    guide = guide_legend(order = 1, keywidth = 0.4, keyheight = 0.5, ncol = 2, position = "bottom", title.position = "top")
  ) +
  new_scale_fill() +
  geom_tile(aes(y = "scRNA Histology", fill = scrna_histology), color = "black") +
  scale_fill_manual(
    name = "scRNA Histology", values = colors$histology_predominant_short,
    guide = guide_legend(order = 2, keywidth = 0.4, keyheight = 0.5, ncol = 1, position = "bottom", title.position = "top")
  ) +
  new_scale_fill() +
  geom_tile(aes(y = "Tumor Stage", fill = stage_of_tissue_simple), color = "black") +
  scale_fill_viridis_d(
    name = "Tumor Stage",
    guide = guide_legend(order = 3, keywidth = 0.4, keyheight = 0.5, ncol = 2, position = "bottom", title.position = "top")
  ) +
  new_scale_fill() +
  geom_tile(aes(y = "Tissue Site", fill = site_of_tissue_simple), color = "black") +
  scale_fill_manual(
    name = "Tissue Site",
    values = colors$site_of_tissue_simple,
    guide = guide_legend(order = 4, keywidth = 0.4, keyheight = 0.5, ncol = 2, position = "bottom", title.position = "top")
  ) +
  new_scale_fill() +
  geom_tile(aes(y = "TP53mut", fill = tp53mut), color = "black") +
  scale_fill_manual(
    name = "Alterations",
    values = c("TRUE" = "black", "FALSE" = "white", "NA" = "grey60"), breaks = c("TRUE", "FALSE", "NA"),
    labels = c("Detected", "Undetected", "Not applicable"),
    guide = guide_legend(order = 5, keywidth = 0.4, keyheight = 0.1, ncol = 1, position = "bottom", title.position = "top")
  ) +
  new_scale_fill() +
  geom_tile(aes(y = "RB1mut", fill = rb1mut), color = "black", show.legend = F) +
  scale_fill_manual(
    values = c("TRUE" = "black", "FALSE" = "white", "NA" = "grey60"), breaks = c("TRUE", "FALSE", "NA"),
    labels = c("Detected", "Undetected", "Not applicable"),
    guide = "none"
  ) +
  new_scale_fill() +
  geom_tile(aes(y = "MAPKalt", fill = mapk_alt), color = "black", show.legend = F) +
  scale_fill_manual(
    values = c("MAPKalt" = "black", "Unknown" = "white", "NA" = "grey60"), breaks = c("TRUE", "FALSE", "NA"),
    labels = c("Detected", "Undetected", "Not applicable"),
    guide = "none"
  ) +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  theme_minimal() +
  theme(panel.background = element_blank(), panel.grid = element_blank()) +
  # guides(fill = guide_legend(keywidth = 0.4, keyheight = 0.5, ncol = 2)) +
  theme(strip.text = element_blank(), legend.title = element_text(hjust = 0.5), axis.text.y = element_text(size = 8)) +
  labs(fill = "Tissue Site", y = NULL) +
  scale_y_discrete(limits = rev(c(
    "Clinical Histology",
    "scRNA Histology",
    "Tumor Stage",
    "Tissue Site",
    "TP53mut",
    "RB1mut",
    "MAPKalt"
  )))

p_prop05 <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  ggplot(aes(x = fct_relevel(sample_id, samp_order))) +
  geom_bar(aes(fill = lineage_lean_05), position = "fill") +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_fill_manual(values = colors$histology_predominant_short) +
  guides(
    x = guide_axis(angle = 90),
    y = guide_axis(cap = "both"),
    fill = guide_legend(keywidth = 0.4, keyheight = 0.4)
  ) +
  labs(y = "Proportion\nLeaning\n(>5%)", x = NULL, fill = "Lineage") +
  scale_y_continuous(expand = c(0, 0)) +
  theme(
    strip.text = element_blank(),
    axis.text.x = element_blank(),
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.y = element_text(angle = 0, vjust = 0.5)
  ) +
  scale_y_continuous(breaks = c(0, 0.5, 1))

p_branch_prop <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  ggplot(aes(x = fct_relevel(sample_id, samp_order))) +
  geom_bar(aes(fill = fct_relevel(lineage_assignment, names(colors$histology_predominant_short))), position = "fill") +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_fill_manual(values = colors$histology_predominant_short) +
  guides(
    x = guide_axis(angle = 90),
    y = guide_axis(cap = "both"),
    fill = guide_legend(keywidth = 0.4, keyheight = 0.4)
  ) +
  labs(y = "Proportion\n(In branch)", x = NULL, fill = "Lineage") +
  scale_y_continuous(expand = c(0, 0)) +
  theme(
    strip.text = element_blank(),
    axis.text.x = element_blank(),
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.y = element_text(angle = 0, vjust = 0.5)
  ) +
  scale_y_continuous(breaks = c(0, 0.5, 1))

lean_prop <- 0.01

p_lean <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  mutate(lineage_lean = case_when(
    abs(lineage_probability2) <= lean_prop ~ "LUAD",
    lineage_probability2 > 0 ~ "SCLC",
    lineage_probability2 < 0 ~ "LUSC",
    .default = "ERROR"
  )) %>%
  ggplot(aes(x = fct_relevel(sample_id, samp_order))) +
  geom_bar(aes(fill = lineage_lean), position = "fill") +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_fill_manual(values = colors$histology_predominant_short) +
  guides(
    x = guide_axis(angle = 90),
    y = guide_axis(cap = "both"),
    fill = guide_legend(keywidth = 0.4, keyheight = 0.4)
  ) +
  labs(y = glue("Proportion\nLeaning"), x = NULL, fill = "Lineage")



p_prop20 <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  ggplot(aes(x = fct_relevel(sample_id, samp_order))) +
  geom_bar(aes(fill = lineage_lean_20), position = "fill") +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_fill_manual(values = colors$histology_predominant_short) +
  guides(
    x = guide_axis(angle = 90),
    y = guide_axis(cap = "both"),
    fill = guide_legend(keywidth = 0.4, keyheight = 0.4)
  ) +
  labs(y = "Proportion\nCommitted\n(>20%)", x = NULL, fill = "Lineage")

p_epi_type_prop <- srt_ti@meta.data %>%
  filter(!is.na(lineage_probability2)) %>%
  ggplot(aes(x = fct_relevel(sample_id, samp_order))) +
  geom_bar(aes(fill = cell_type_epi), position = "fill") +
  facet_grid(. ~ time_point, scales = "free", space = "free") +
  scale_fill_manual(values = colors$cell_type_epi) +
  guides(
    x = guide_axis(angle = 90),
    y = guide_axis(cap = "both"),
    fill = guide_legend(keywidth = 0.4, keyheight = 0.4)
  ) +
  labs(y = "Epithelial Type\nProportion", x = NULL, fill = "Epithelial Type")

p_prop <- free(p_branch_prop, type = "label", side = "l") + free(p_lean, type = "label", side = "l") +
  free(p_epi_type_prop, type = "label", side = "l") +
  plot_layout(ncol = 1, guides = "collect") &
  theme(
    strip.text = element_blank(),
    axis.text.x = element_blank(),
    axis.line.x = element_blank(),
    axis.ticks.x = element_blank(),
    axis.title.y = element_text(angle = 0, vjust = 0.5, size = 8),
    axis.text.y = element_text(size = 8)
  ) &
  scale_y_continuous(expand = c(0, 0), breaks = c(0, 0.5, 1))

pcomb <-
  free((p_linprob_vln + labs(x = NULL) + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), plot.margin = margin(t = tb_pad, b = 0, unit = "lines"))), type = "label", side = "l") +
  free((p_linprob_dediff + labs(x = NULL) + theme(strip.text = element_blank(), strip.background = element_blank(), axis.lines = element_blank(), strip.clip = "off", panel.border = element_rect(fill = NA), axis.text.x = element_blank(), axis.ticks.x = element_blank(), plot.margin = margin(t = tb_pad, b = 0, unit = "lines"))), type = "label", side = "l") +
  free((p_prop + theme(plot.margin = margin())), type = "label", side = "l") + (p_anno + labs(x = "Sample ID") + theme(plot.margin = margin(t = 0, b = tb_pad, unit = "lines")) + guides(x = guide_axis(angle = 90))) + plot_layout(ncol = 1, heights = c(1, 1, 1.3, 0.6))
ggsave(pcomb, file = here("plots", "lineage_prob_dotviolin_anno.pdf"), width = 11, height = 8)
pcomb

TP53mut TN

p <- srt_ti@meta.data %>%
  mutate(TP53mut = case_when(
    has_impact == F ~ "NA",
    tp53mut == "TRUE" ~ "TP53mut",
    tp53mut == "FALSE" ~ "TP53wt",
    .default = "Other"
  )) %>%
  filter(time_point == "TN" & TP53mut != "NA") %>%
  ggplot(aes(x = fct_rev(TP53mut), y = luad_dediff)) +
  geom_violin(scale = "width", aes(fill = TP53mut), show.legend = F) +
  scale_fill_manual(values = rev(pal_aaas()(2))) +
  geom_boxplot(width = 0.2, outliers = F) +
  scale_y_continuous(breaks = c(0, 0.5, 1)) +
  guides(
    y = guide_axis(cap = "both"),
    x = guide_axis(cap = "both")
  ) +
  ggpubr::geom_pwc(label = "p.signif", tip.length = 0, vjust = 0.5) +
  coord_cartesian(clip = "off") +
  labs(x = "Treatment Naive", y = "LUAD dedifferentiation")
ggsave(p, file = here("plots", "TP53mut_TN_luad_dediff.pdf"), width = 1.75, height = 2)
p

srt_epi <- readRDS(here("paper_data", "egfr_epi.rds"))
srt_meta <- readRDS(here("paper_data", "egfr_cohort.rds"))@meta.data

prop_df <- srt_meta %>%
  filter(is_tumor_cell == "TRUE") %>%
  mutate(TP53mut = case_when(
    has_impact == F ~ "NA",
    tp53mut == "TRUE" ~ "TP53mut",
    tp53mut == "FALSE" ~ "TP53wt",
    .default = "Other"
  )) %>%
  filter(time_point == "TN" & TP53mut != "NA", is_tumor_cell == "TRUE") %>%
  add_count(sample_id, name = "n_samp") %>%
  add_count(sample_id, cell_type_epi, name = "n_ct") %>%
  mutate(prop = n_ct / n_samp) %>%
  distinct(sample_id, TP53mut, cell_type_epi, prop)

p <- prop_df %>%
  ggplot(aes(x = fct_relevel(cell_type_epi, names(colors$cell_type_epi)), y = prop)) +
  stat_summary(geom = "bar", fun = "median", aes(fill = fct_rev(TP53mut)), position = position_dodge(width = 1)) +
  geom_errorbar(aes(group = fct_rev(TP53mut)),
    stat = "summary", color = "black", alpha = 0.75, width = 0.5,
    fun.min = function(z) {
      quantile(z, 0.25)
    },
    fun.max = function(z) {
      quantile(z, 0.75)
    },
    fun = median, position = position_dodge(width = 1)
  ) +
  guides(x = guide_axis(angle = 90)) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  labs(y = "Median Proportion", x = "Epithelial Cell State", fill = "TP53 status") +
  scale_fill_aaas()
ggsave(p, file = here("plots", "TP53mut_TN_epi_type_proportions.pdf"), width = 6, height = 3)
p

MAPKalt PD

p <- srt_ti@meta.data %>%
  filter(time_point == "PD" & mapk_alt != "NA") %>%
  ggplot(aes(x = fct_rev(mapk_alt), y = luad_dediff)) +
  geom_violin(scale = "width", aes(fill = mapk_alt), show.legend = F) +
  scale_fill_manual(values = rev(pal_aaas()(2))) +
  geom_boxplot(width = 0.2, outliers = F) +
  scale_y_continuous(breaks = c(0, 0.5, 1)) +
  guides(
    y = guide_axis(cap = "both"),
    x = guide_axis(cap = "both")
  ) +
  ggpubr::geom_pwc(label = "p.signif", tip.length = 0, vjust = 0.5) +
  coord_cartesian(clip = "off") +
  labs(x = "Progressive Disease", y = "LUAD dedifferentiation")
ggsave(p, file = here("plots", "MAPKalt_PD_luad_dediff.pdf"), width = 1.75, height = 2)
p